knitr::opts_chunk$set(
message = FALSE,
warning = FALSE
)
library(patchwork)
library(GGally)
library(dplyr)
library(tidyverse)
library(cowplot)
library(plotly)
corruption = read_csv('.\\data\\corruption_EDA.csv')
corruption = corruption|>
mutate(
corruption_index = as.numeric(corruption_index
),
government_effectiveness = as.numeric(government_effectiveness
),
political_stability_and_absence_of_violence_terrorism
=as.numeric(political_stability_and_absence_of_violence_terrorism
),
regulatory_quality
=as.numeric(regulatory_quality
),
rule_of_law
=as.numeric(rule_of_law
),
voice_and_accountability
=as.numeric(voice_and_accountability
)
)
cpi_year = read_csv('.\\data\\cpi_data_year.csv')
corruption|>
group_by(year)|>
summarise(mean = mean(signif(as.numeric(corruption_index),4),na.rm = TRUE))|>
ggplot(aes(x=year, y=mean)) +
geom_point(color = 'red')+geom_line(aes(group=1),color = 'red')

cpi_year|>
pivot_longer(
'2005':'2022',
values_to = 'cpi',
names_to = 'year'
)|>
group_by(year)|>
summarise(mean = mean(cpi,na.rm=TRUE))|>
ggplot(aes(x=year, y=mean)) +
geom_point(color = 'red')+geom_line(aes(group=1),color = 'red')

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = as.numeric(government_effectiveness), y = as.numeric(corruption_index),9))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption and effectiveness",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = signif(as.numeric(political_stability_and_absence_of_violence_terrorism
),4), y = signif(as.numeric(corruption_index),4)))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption and stability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = signif(as.numeric(regulatory_quality
),4), y = signif(as.numeric(corruption_index),4)))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption and regulatory quality",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = signif(as.numeric(rule_of_law
),4), y = signif(as.numeric(corruption_index),4)))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption and regulatory quality",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = signif(as.numeric(voice_and_accountability
),4), y = signif(as.numeric(corruption_index),4)))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption against voice and accountability",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

corruption|>
group_by(year,development)|>
summarize(mean_cpi = mean(signif(as.numeric(corruption_index),4),na.rm = TRUE))|>
ggplot(aes(x = year,y = mean_cpi, group = development,color = development)) +
geom_point(shape=19,size = 3)+geom_line()

development_prop = corruption|>
group_by(development)|>
summarize(count = n_distinct(country_name))
library(RColorBrewer)
myPalette <- brewer.pal(3, "Set2")
pie(pull(development_prop,count) , labels = c("Dveloped","Developing","Least Developed"), border="white", col=myPalette )

mean_cpi = corruption|>
group_by(continent)|>
summarize(mean_cpi = mean(as.numeric(corruption_index),na.rm=TRUE))|>
ggplot(aes(x = continent,y = mean_cpi,fill = continent))+
geom_bar(stat='identity')
cpi_density =
corruption|>
filter(year == 2022) |>
group_by(continent)|>
ggplot(
aes(
x = corruption_index
)
) +
geom_density(aes(fill = continent), alpha = 0.15) +
labs(
x = "cpi",
y = "Density"
)
cpi_continent = mean_cpi+cpi_density
cpi_continent

mean_gdp = corruption|>
group_by(continent)|>
summarize(gdp = mean(as.numeric(gdp),na.rm=TRUE))|>
ggplot(aes(x = continent,y = gdp,fill = continent))+
geom_bar(stat='identity')
gdp_density = corruption|>
filter(year == 2022) |>
group_by(continent)|>
ggplot(
aes(
x = gdp
)
) +
geom_histogram(aes(fill = continent), alpha = 0.15) +
labs(
x = "gdp",
y = "Density"
)
gdp_density

gdp_continent = mean_gdp + gdp_density
gdp_continent

library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)
corruption |>
filter(year == 2022)|>
select(continent,population)|>
ggplot( aes(x=continent, y=population, fill=continent, color=continent)) +
geom_boxplot() +
scale_fill_viridis(discrete=TRUE) +
scale_color_viridis(discrete=TRUE) +
theme_ipsum() +
theme(
legend.position="none"
)

corruption %>%
ggplot( aes(x=continent, y=population, fill=continent)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6) +
geom_jitter(color="red", size=0.4, alpha=0.9) +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("population of countries") +
xlab("")

myplots <- vector('list', 4)
for (i in 2019:2022){
myplots[[i-2018]] =
corruption %>%
filter(year == i) %>%
ggplot(aes(x = gdp/population, y = as.numeric(corruption_index),9))+geom_point()+geom_smooth(method = 'lm', se = TRUE, color = 'red'
)+
labs(title = sprintf("year %s", i))
}
plot_row1 <- plot_grid(myplots[[1]], myplots[[2]])
plot_row2 <- plot_grid(myplots[[3]], myplots[[4]])
# title
title <- ggdraw() +
draw_label(
"Scatter Plot for corruption and effectiveness",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(title,plot_row1,plot_row2 ,ncol=1, label_size = 12,rel_heights=c(0.1, 1,1))

normalize <- function(x, na.rm = TRUE) {
return((x- min(x)) /(max(x)-min(x)))
}
corruption |>
filter(year == 2022)|>
select(corruption_index,country_name,population,gdp)
## # A tibble: 214 × 4
## corruption_index country_name population gdp
## <dbl> <chr> <dbl> <dbl>
## 1 -1.18 Afghanistan 41128771 NA
## 2 -0.408 Albania 2775634 18.9
## 3 -0.638 Algeria 44903225 192
## 4 1.27 American Samoa 44273 NA
## 5 1.27 Andorra 79824 3.35
## 6 -0.602 Angola 35588987 107
## 7 1.27 Anguilla NA NA
## 8 0.311 Antigua and Barbuda 93763 1.76
## 9 -0.447 Argentina 46234830 633
## 10 0.0280 Armenia 2780469 19.5
## # ℹ 204 more rows
pop = pull(corruption,population)
gdp = pull(corruption,gdp)
cpi = pull(corruption,corruption_index)
pop = scale(pop, center = min(pop,na.rm = TRUE), scale = max(pop,na.rm = TRUE) - min(pop,na.rm = TRUE))
gdp = normalize(gdp)
cpi = scale(as.numeric(cpi))
corruption
## # A tibble: 5,136 × 15
## country_name country_code year corruption_index government_effectiveness
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1996 -1.29 -2.18
## 2 Afghanistan AFG 1998 -1.18 -2.10
## 3 Afghanistan AFG 2000 -1.27 -2.17
## 4 Afghanistan AFG 2002 -1.25 -1.59
## 5 Afghanistan AFG 2003 -1.34 -1.18
## 6 Afghanistan AFG 2004 -1.35 -0.945
## 7 Afghanistan AFG 2005 -1.45 -1.23
## 8 Afghanistan AFG 2006 -1.45 -1.47
## 9 Afghanistan AFG 2007 -1.61 -1.44
## 10 Afghanistan AFG 2008 -1.67 -1.53
## # ℹ 5,126 more rows
## # ℹ 10 more variables:
## # political_stability_and_absence_of_violence_terrorism <dbl>,
## # regulatory_quality <dbl>, rule_of_law <dbl>,
## # voice_and_accountability <dbl>, gdp <dbl>, continent <chr>,
## # development <chr>, latitude <dbl>, longitude <dbl>, population <dbl>
dev_pop = corruption |>
mutate(
pop = case_when(
population > 100000000~3,
population >30000000~2,
.default = 1
),
development = case_when(
development == 'Least Developed' ~ 1,
development == 'Developing' ~ 2,
development == 'Developed' ~ 3,
)
)|>
group_by(pop,development)|>
summarize(cpi = mean(as.numeric(corruption_index),na.rm = TRUE))
pop = pull(dev_pop,pop)
dev = pull(dev_pop,development)
cpi = pull(dev_pop,cpi)
ma_dev_pop = matrix(
c(rep(0,9)),
nrow = 3,
ncol = 3,
byrow = TRUE
)
for (i in 1:3){
for(j in 1:3){
ma_dev_pop[i,j] = cpi[3*(i-1)+j]
}
}
fig <- plot_ly(z = ~ ma_dev_pop)
fig <- fig %>% add_surface()
fig
log_pop = corruption |>
filter(year==2022)|>
mutate(gdp = round(10*log(gdp),0))|>
mutate(population = 4*round(1*log(population),0))|>
mutate(corruption_index =41+ round(10*log(as.numeric(corruption_index)),0))|>
drop_na()|>
select(country_name,gdp,population,corruption_index)
ma_log = matrix(
c(rep(0,110^2)),
nrow = 110,
ncol = 110,
byrow = TRUE
)
pop = pull(log_pop,population)
gdp = pull(log_pop,gdp)
cpi = pull(log_pop,as.numeric(corruption_index))
356493
## [1] 356493
for (i in 1:length(pop)){
ma_log[pop[i],gdp[i]] = cpi[i]
print(cpi[i])
}
## [1] 43
## [1] 29
## [1] 5
## [1] 47
## [1] 43
## [1] 43
## [1] 21
## [1] 44
## [1] 45
## [1] 37
## [1] 43
## [1] 41
## [1] 46
## [1] 41
## [1] 0
## [1] 33
## [1] 22
## [1] 32
## [1] 37
## [1] 50
## [1] 35
## [1] 45
## [1] 32
## [1] 49
## [1] 43
## [1] 36
## [1] 47
## [1] 8
## [1] 34
## [1] 46
## [1] 45
## [1] 46
## [1] 39
## [1] 35
## [1] 45
## [1] 15
## [1] 27
## [1] 38
## [1] 20
## [1] 37
## [1] 38
## [1] 47
## [1] 40
## [1] 27
## [1] 27
## [1] 31
## [1] 32
## [1] 34
## [1] 26
## [1] 36
## [1] 48
## [1] 49
## [1] 48
## [1] 12
## [1] 34
## [1] 38
## [1] 19
## [1] 39
## [1] 0
## [1] 35
## [1] 35
## [1] 28
## [1] 31
## [1] 46
## [1] 48
## [1] 26
## [1] 38
## [1] 37
## [1] 32
## [1] 36
## [1] 38
## [1] 48
## [1] 48
## [1] 37
## [1] 42
## [1] 46
## [1] 42
## [1] 46
fig <- plot_ly(z = ~ ma_log)
fig <- fig %>% add_surface()
fig
corruption %>%
select(corruption_index
, political_stability_and_absence_of_violence_terrorism
, government_effectiveness
, regulatory_quality,rule_of_law,voice_and_accountability
,continent) %>%
ggpairs(
title = "Correlations Between Key factors",
subtitle = "By Continents",
ggplot2::aes(alpha = 0.1)
) +
scale_fill_discrete() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
